# analysis: public versus private hospital results in new south wales

library(tidyverse)
library(magrittr)
library(haven)
library(readxl)
library(epitools)
library(lubridate)

procedure = "prostat" # or = pancreat, neph

cohort <- read_sas(paste0("Z:/LowValueCare/APDC/on-ny-nsw/",procedure,"cohort.sas7bdat"))
cohort_work <- cohort

# create table primary_payer X year X age X sex X hospital_type X income
#####################################################################
cohort_work %<>% mutate(year = year(index_date))
source("neighbourhood-income.R")

cohort_work <- sa2_income(cohort_work,sa2_cor)

cohort_work %<>% 
  mutate(neighbourhood_income = fct_relevel(neighbourhood_income,c("1","2","3","4","5")))

cohort_work %<>% mutate(phi_status = if_else(primary_payer == "phi","insured","no_insurance"))

cohort_work %<>% mutate(age_group_table = case_when(age < 20 ~ "< 20",
                                             20 <= age & age < 25 ~ "20-24",
                                             25 <= age & age < 30 ~ "25-29",
                                             30 <= age & age < 35 ~ "30-34",
                                             35 <= age & age < 40 ~ "35-39",
                                             40 <= age & age < 45 ~ "40-44",
                                             45 <= age & age < 50 ~ "45-49",
                                             50 <= age & age < 55 ~ "50-54",
                                             55 <= age & age < 60 ~ "55-59",
                                             60 <= age & age < 65 ~ "60-64",
                                             65 <= age & age < 70 ~ "65-69",
                                             70 <= age & age < 75 ~ "70-74",
                                             75 <= age & age < 80 ~ "75-79",
                                             80 <= age & age < 85 ~ "80-84",
                                             age >= 85  ~ "85+"))

cohort_out <- cohort_work %>%
  group_by(age_group_table,phi_status,facility_type,neighbourhood_income,year,sex) %>%
  summarise(procedure_count = n()) %>% ungroup()

cohort_out %<>%
  mutate(age_group_2 = case_when(age_group_table %in% c("< 20","20-24","25-29","30-34","35-39","40-44","45-49") ~
                                   "18-49",
                                 age_group_table %in% c("50-54","55-59") ~ "50-59",
                                 age_group_table %in% c("60-64","65-69") ~ "60-69",
                                 age_group_table %in% c("70-74","75-79") ~ "70-79",
                                 age_group_table %in% c("80-84","85+") ~ "80+"))


# table 1: hospital tables
#####################################################################
nsw_population <- read_sas("population_nsw.sas7bdat")
nsw_population %<>% select(-sa2_name) %>%
  group_by(age,sex,quintile_personal) %>%
  summarise(count = sum(count)) %>% ungroup()
nsw_population %<>% mutate(quintile_personal = as.factor(quintile_personal))

# 1a overall results
ont_population_age <- read_csv("Copy of most recent pops and proc counts.csv")
ont_population_age %<>% filter(region=="ON")
ont_population_age %<>% select(age,sex,population) %>% distinct()
ont_population_age %<>% 
  mutate(age_group_rename = if_else(age=="under50","18-49",age))
ont_population_age %<>%
  mutate(sex_rename = case_when(sex == "male" ~ "1",
                                sex == "female" ~ "2"))
ont_population_age %<>% rename(ontario_population = population)

cohort_out_1a <- cohort_out %>%
  group_by(facility_type,sex,age_group_2) %>%
  summarise(procedure_count = sum(procedure_count)) %>% ungroup()

nsw_population_1a <- nsw_population %>%
  select(-quintile_personal) %>%
  group_by(age,sex) %>%
  summarise(count = sum(count)) %>% ungroup()

table1a <- nsw_population_1a %>%
  left_join(ont_population_age %>% select(-age,-sex),by=c("age"="age_group_rename","sex"="sex_rename"))
table1a %<>%
  left_join(cohort_out_1a,by=c("age" = "age_group_2","sex" = "sex"))

table1a %<>% 
  mutate(procedure_py = procedure_count/(63/12))

if (procedure == "prostat"){
  table1a %<>% filter(sex == "1")
}

table1a %>%
  mutate(count_allyears = count*(63/12),
         stdcount_on = (procedure_count/count_allyears)*ontario_population) %>%
  group_by(facility_type) %>%
  summarise(n_procedures = sum(procedure_count),
            crude_rate = ageadjust.direct(count = procedure_count,
                                            pop = count_allyears,
                                            stdpop = ontario_population)[1]*100000,
            adj_rate = ageadjust.direct(count = procedure_count,
                                          pop = count_allyears,
                                          stdpop = ontario_population)[2]*100000,
            cil_rate = ageadjust.direct(count = procedure_count,
                                          pop = count_allyears,
                                          stdpop = ontario_population)[3]*100000,
            ciu_rate = ageadjust.direct(count = procedure_count,
                                          pop = count_allyears,
                                          stdpop = ontario_population)[4]*100000) %>%
  mutate(adj_rate_print = paste0(round(adj_rate,2), " (",round(cil_rate,2),", ",round(ciu_rate,2),")")) %>%
  write_csv(path = paste0(here::here(),"/table1a_",procedure,".csv"))
  

# 1b income results
ont_population <- read_csv("Copy of most recently used income quintile rate data.csv")
ont_population %<>% filter(region=="ON")
ont_population %<>% select(iq,age,sex,population) %>% distinct()
ont_population %<>% 
  mutate(age_group_rename = if_else(age=="under50","18-49",age))
ont_population %<>%
  mutate(sex_rename = case_when(sex == "Male" ~ "1",
                                sex == "Female" ~ "2"))
ont_population %<>% rename(ontario_population = population)
ont_population %<>% mutate(income_quintile = as.factor(iq))

cohort_out_1 <- cohort_out %>%
  group_by(facility_type,sex,neighbourhood_income,age_group_2) %>%
  summarise(procedure_count = sum(procedure_count)) %>% ungroup()

nsw_population %<>% filter(!is.na(quintile_personal))

table1 <- nsw_population %>%
  left_join(ont_population %>% select(-age,-sex,-iq),by=c("age"="age_group_rename","sex"="sex_rename",
                                "quintile_personal"="income_quintile"))
table1 <- table1 %>%
  left_join(cohort_out_1,by=c("age" = "age_group_2","sex" = "sex","quintile_personal" = "neighbourhood_income"))

# # prostatectomy adjustment
if (procedure == "prostat"){
  table1 %<>%
    filter(sex=="1")
}

table1 %<>%
  mutate(count_allyears = count*(63/12),
         stdcount_on = procedure_count/count_allyears * ontario_population)

table1 %>%
  group_by(facility_type,quintile_personal) %>%
  summarise(n_procedures = sum(procedure_count),
            crude_rate = ageadjust.direct(count = procedure_count,
                                            pop = count_allyears,
                                            stdpop = ontario_population)[1]*100000,
            adj_rate = ageadjust.direct(count = procedure_count,
                                          pop = count_allyears,
                                          stdpop = ontario_population)[2]*100000,
            cil_rate = ageadjust.direct(count = procedure_count,
                                          pop = count_allyears,
                                          stdpop = ontario_population)[3]*100000,
            ciu_rate = ageadjust.direct(count = procedure_count,
                                          pop = count_allyears,
                                          stdpop = ontario_population)[4]*100000) %>%
  mutate(adj_rate_print = paste0(round(adj_rate,2), " (",round(cil_rate,2),", ",round(ciu_rate,2),")")) %>%
  write_csv(path = paste0("table1_direct_",procedure,".csv"))